# load companies file of EIN to name and endowment data

companies_to_ein <- read_rds(here('data',   'companies.RDS'))


endowment_data <- read_rds(here("data", 
                                "endowments_by_most_recent_filings.RDS")) %>%
  select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
  pivot_longer(-c(EIN, fiscal_year),
               names_to = "variable_name") %>%
  left_join(companies_to_ein) %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year)))
# extract return dates
source(here("GET_VARS.R"))

files <- dir(here("ballet_990_released_20230208"),
              full.names = TRUE)


dates <- map_df(files,
                ~get_df(filename = .x, 
                        variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
  mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
   filter_ein()

saveRDS(dates, here('data', 'dates.RDS')) 
dates <- readRDS( here('data', 'dates.RDS')) %>%
  select(EIN, TaxPeriodEndDt, fiscal_year) 
 

endowment_data <- endowment_data %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
  left_join(dates)
# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {

  
   plt <- data %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep") %>%
    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = organization_name,
               label =EIN
               )) +
    geom_point() +
    geom_function(fun=function(x)x,color="darkred", alpha = .8) +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank")) +
    theme_bw() +
    labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
     facet_wrap(~fiscal_year)+
      theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) 
  
  ggplotly(plt, margin = m, height = 550) %>%
    partial_bundle()

}

plot_ranks_by_consistency <- function(var1, var2, data) {

  
   plt <- data %>%
     filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep")  %>%
     mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
   group_by(EIN) %>%
   mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
          sum_neg = sum(rank_diff < 0,  na.rm=TRUE),
          sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
     ungroup() %>% 
   mutate(category = case_when(sum_pos != 0 & sum_neg != 0 ~ "Had Some Change",
                               sum_zero ==  sum_pos + sum_neg + sum_zero ~ "Always Ranked the Same",
                            sum_pos == sum_pos + sum_neg + sum_zero ~ paste("Always Ranked Better in", var1),
                            sum_neg == sum_pos + sum_neg + sum_zero ~ paste("Always Ranked Better in", var2 ))) %>%

    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = category,
               text =organization_name
               )) +
    geom_point() +
    geom_function(fun=function(x)x,color="darkred", alpha = .8) +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank"),
         color = "Consistency of Rankings") +
    theme_c() +
    labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
      scale_color_brewer(palette ="Set2") +
     facet_wrap(~fiscal_year)+
      theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) 
  
  ggplotly(plt, margin = m, height = 550) %>% partial_bundle() 

}

plot_ranks_by_consistency <- function(var1, var2, data) {

  
   plt <- data %>%
     filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep")  %>%
     mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
   group_by(EIN) %>%
   mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
          sum_neg = sum(rank_diff < 0,  na.rm=TRUE),
          sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
     ungroup() %>% 
   mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = prop_positive,
               text =organization_name
               )) +
    geom_function(fun=function(x)x,color="darkred", alpha = .8, n =201) +
    geom_point() +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank"),
         title = glue("Rank of {var2} vs. Rank of {var1}"),
         color = glue("Proportion Where {var1}\nRanked Better than\n{var2}")) +
    theme_c(legend.text=element_text(size =8)) +
    scale_color_gradient2(high="#5935CF", low="#D18E01", mid="#E2E2E2", limits=c(0,1), midpoint=.5) +
     facet_wrap(~fiscal_year)+
      theme(plot.title = element_text(size = 12, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) 
  
  ggplotly(plt, margin = m, height = 550) %>% partial_bundle()

}

# 
# print_df_category <- function(data, var1, var2) {
#   data %>%
#      filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
#     group_by(fiscal_year) %>%
#    # arrange(var1) %>%
#     mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
# #    arrange(var2) %>%
#     mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep")  %>%
#      mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
#    group_by(EIN) %>%
#    mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
#           sum_neg = sum(rank_diff < 0,  na.rm=TRUE),
#           sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
#      ungroup() %>% 
#    mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
#     group_by()
# }
# 


# function to plot variables of interest against each other
plot_combo <- function(var1, var2, data) {
  
  data %>%
    ggplot(aes(x = !!sym(var1), y = !!sym(var2), color = EIN)) +
    geom_point(alpha = .9) +
   # geom_line(alpha = .5) +
    facet_wrap(~fiscal_year) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme_bw()+
      theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
            legend.position = "none",
            axis.text.x = element_text(angle = 60, vjust = .6)) +
    scale_x_continuous(labels=comma) +
    scale_y_continuous(labels=comma) +
    labs(title = paste0(var2, " vs. ", var1),
         subtitle = "Fill by EIN")
  
}


endowment_data_wide <- endowment_data %>% 
  pivot_wider(names_from=variable_name,
              values_from=value) 

Plotting Endowment Variables Against Each Other, By Year

vars <-  unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]

# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
  as.data.frame()

if (!all_plots) variable_combinations <- variable_combinations[1:4,]
cat('## Scale of Original Variables {.tabset}   \n\n')

pwalk(variable_combinations, ~{ 
 cat('### ',paste0(.x, ", ", .y),'\n\n')
 plt <- plot_combo(var1 = .x, var2 = .y, data = endowment_data_wide)
 print(plt)
  cat('\n\n')
 }
)

Where Endowments are Held, By Fiscal Year

plt <- endowment_data_wide %>%
  select(contains("EOY"),fiscal_year, 
         EIN, organization_name) %>%
  pivot_longer(cols = contains("EOY"))  %>%
  mutate(name = case_when(
    name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment",
    name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment",
    name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment"
  )) %>%
  mutate(value = 100*value) %>%
  filter(!is.na(value)) %>%
  ggplot(aes(x=fiscal_year,
             y  = value, 
             color = organization_name)) +
  geom_line() +
  facet_wrap(~name) +
  theme_c(strip.text = element_text(margin =margin(3,0,25,0),
                                    size =4,
                                    lineheight=.5)) +
  scale_color_viridis(option="rocket", discrete= TRUE) +
  labs(y="Percentage of Endowment in Category",
       x = "Fiscal Year") 

ggplotly(plt, margin = m, height =550)  %>% partial_bundle()

Rankings

Relationships between how companies rank in different variables

plotlist <- pmap(variable_combinations, ~{ 
 #plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
   plt <- plot_ranks_by_consistency(var1 = .x, var2 = .y, data = endowment_data_wide) %>% partial_bundle()

 }
)


htmltools::tagList(setNames(plotlist, NULL))

How ranks change over time

# 
# 
# rankings_over_time <- endowment_data_wide %>%
#   filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
#   group_by(fiscal_year) %>%
#   mutate(rank = rank(-BeginningYearBalanceAmt, na.last = "keep")) %>%
#   filter(!is.na(rank)) %>%
#   ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
#   geom_line() +
#   scale_color_viridis(option = "rocket", discrete=TRUE) +
#   theme_c()
# 
# ggplotly(rankings_over_time, margin = m, height = 550) 


plot_ranks_over_time <- function(dat,var) {
  plt <- dat %>%
    filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
    group_by(fiscal_year) %>%
    mutate(rank = rank(-!!sym(var), na.last = "keep")) %>%
    filter(!is.na(rank)) %>%
    ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
    geom_line() +
    geom_point(alpha = .3, size =.5) +
    scale_color_viridis(option = "rocket", discrete=TRUE) +
    theme_c() +
    scale_y_continuous(n.breaks = 10) +
    scale_x_continuous(breaks=2011:2020) +
    labs(y = paste("Rank of ", var),
         x = "Fiscal Year",
         title = paste0("Rank over time for ", var))
  ggplotly(plt, margin = m, height = 550)
}

plotlist <- map(vars,~ {
  plt<- plot_ranks_over_time(endowment_data_wide,.x) %>% partial_bundle()
})


htmltools::tagList(setNames(plotlist, NULL))

Compensation

m <- list(
    l = 50,
    r = 50,
    b = 50,
    t = 150,
    pad = 0.5
)



source(here("GET_VARS.R"))

files <- dir(here("ballet_990_released_20230208"),
              full.names = TRUE)

get_all_children_by_file(files[5], "/Return/ReturnData/IRS990" )

##################################
# EMPLOYEE INFORMATION
##################################


employee_comp_vars <- c(
  "/Return/ReturnData/IRS990/CYTotalRevenueAmt",
  "/Return/ReturnData/IRS990/TotalEmployeeCnt",
  "/Return/ReturnData/IRS990/EmployeeCnt",
  "/Return/ReturnData/IRS990/CYSalariesCompEmpBnftPaidAmt",
  "/Return/ReturnData/IRS990/CompCurrentOfcrDirectorsGrp/TotalAmt")


employees <- map_df(files, ~get_df(filename = .x, 
                              variables=employee_comp_vars))

employees %>%
  select(-filename) %>%
  mutate(across(-c(ReturnTs,EIN, fiscal_year), 
                as.numeric)) %>%
  rename(OffDirCompAmt =TotalAmt ) %>%
  saveRDS(here("data", "employees.RDS"))




#################
# SCHEDULE J
#################
comp <- map_df(files, ~get_df(filename = .x, schedule = "j"))


comp_clean <- comp %>%
  rename_with(.cols= everything(),
              ~gsub('/Return/ReturnData/IRS990ScheduleJ/', '', .)) %>% 
  select(-contains("Ind")) %>%
  select(fiscal_year, EIN,
         contains("RltdOrgOfficerTrstKeyEmplGrp")) %>%
  # only extract cols within the RltdOrgOfficerTrstKeyEmplGrp
  select(EIN, fiscal_year,
         matches("RltdOrgOfficerTrstKeyEmplGrp\\[.*.\\]/")) %>%
  pivot_longer(-c(EIN,fiscal_year)) %>%
  mutate(id = gsub("\\D", "", name),
       #  name_old = name,
          name = gsub(".*./", "", name),
         id = gsub("990", "", id))

  
comp_clean <- comp_clean %>%
  filter(!is.na(value)) %>%
  distinct() %>% 
  pivot_wider(names_from = name, values_from = value) 



comp_clean <- comp_clean %>%
  mutate(across(contains("Amt"), as.numeric))%>%
  mutate(TitleTxt=tolower(TitleTxt))
  
saveRDS(comp_clean, here("data", "schedj.RDS"))
comp_clean <- read_rds(here("data", "schedj.RDS"))%>%
  left_join(companies_to_ein) %>%
  mutate(fiscal_year = as.numeric(paste(fiscal_year)))

employees <- readRDS(here("data", "employees.RDS")) %>%
    mutate(fiscal_year = as.numeric(paste(fiscal_year)))
# clean up title text field because it was free text in the form 990
comp_clean <- comp_clean %>% 
  mutate(TitleTxt = gsub("dancer/choreographer",
                          "dancer / choreographer",
                         TitleTxt),
         TitleTxt = gsub("vp", "Vice President", TitleTxt),
         TitleTxt = gsub("dorector", "director",TitleTxt),
  title_clean = case_when(
    grepl("ceo", TitleTxt, ignore.case = TRUE ) ~"CEO",
    grepl("cfo", TitleTxt, ignore.case = TRUE)~ "Chief Financial Officer",
    grepl("executive dir", TitleTxt, ignore.case = TRUE) ~"Executive Director",
    grepl("artistic dir",TitleTxt,  ignore.case = TRUE) ~"Artistic Director",
    grepl("emeritus|emerita", TitleTxt, ignore.case = TRUE) ~"Emirita/Emiritus Position",
    grepl( "chief dev",TitleTxt, ignore.case=TRUE) &  
    grepl("officer",TitleTxt,  ignore.case = TRUE) ~"Chief Development Officer",
     grepl("director of market|marketing director",TitleTxt,  ignore.case = TRUE) ~ "Director of Marketing",
    grepl("music director",TitleTxt,  ignore.case = TRUE) ~"Music Director",
    grepl("mktg", TitleTxt, ignore.case = TRUE ) & 
      grepl("officer|ofc", TitleTxt, ignore.case = TRUE ) ~ "Marketing Officer",
    grepl("Director of Development",TitleTxt, ignore.case = TRUE) ~ "Director of Development",
    grepl("chief",TitleTxt, ignore.case = TRUE) & 
      grepl("officer",TitleTxt, ignore.case = TRUE) ~ "Other Chief Officer",
    grepl("Dir of Legal",TitleTxt, ignore.case = TRUE) ~"Director of Legal Affairs",
    grepl("Former Senior Dir", TitleTxt, ignore.case = TRUE) ~ "Former Senior Director",
    grepl("Director|Dir", TitleTxt, ignore.case = TRUE) ~ "Other Director",
    grepl("Director", TitleTxt, ignore.case = TRUE) ~ "Other Director",
    TRUE ~ TitleTxt
  )) 

Number of EINs with Each Title

# number of EINs with each type of title
comp_clean %>%
  group_by(title_clean) %>%
  summarize(`Number of EINs` = n_distinct(EIN)) %>% 
  arrange(desc(`Number of EINs`))

Number of Individuals with Title

# number of individuals with title
comp_clean %>%
  mutate(title_clean=tolower(title_clean)) %>%
  filter(!is.na(title_clean)) %>%
  group_by(title_clean) %>%
  summarize(`Number of Individuals in Position` = n()) %>% 
  arrange(desc(`Number of Individuals in Position`))

# missingness by variable
# comp_clean %>%
#   select(-c(EIN,fiscal_year,id)) %>%
#   is.na() %>% 
#   colSums() %>%
#   as_tibble(rownames="Variable") %>%
#   mutate(`Not Missing` = nrow(comp_clean) - value) %>%
#   select(-value)

Compensation by Title

Base Compensation

comp_clean %>% 
  group_by(title_clean) %>%
  mutate(m = median(BaseCompensationFilingOrgAmt, na.rm= TRUE)) %>%
  filter(!is.na(title_clean)) %>%
  ungroup() %>%
  ggplot(aes(x=fct_reorder(title_clean,m),
             y = BaseCompensationFilingOrgAmt)) +
  geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
  coord_flip() +
  theme_bw() +
  labs(title = "Base Compensation by Title",
       x = "Title")+
  theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
         axis.text.x= element_text(size = 8))

Total Compensation

comp_clean %>% 
  group_by(title_clean) %>%
  mutate(m = median(TotalCompensationFilingOrgAmt, na.rm= TRUE)) %>%
  filter(!is.na(title_clean)) %>%
  ungroup() %>%
  ggplot(aes(x=fct_reorder(title_clean,m),
             y = TotalCompensationFilingOrgAmt)) +
  geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
  coord_flip() +
  theme_bw() +
  labs(title = "Total Compensation by Title",
       x = "Title")+
  theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
         axis.text.x= element_text(size = 8))

Compensation by Year

plt <- comp_clean %>%
  group_by(EIN, fiscal_year) %>%
  summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) %>%
  group_by(EIN) %>%
  mutate(m = median(total_compensation, na.rm= TRUE)) %>%
  ungroup() %>%
#  group_by(EIN) %>%
  mutate(tile = ntile(m,2),
         tilename = ifelse(tile == 1,
                           "EINs Below the Median",
                           "EINs Above the Median"),
         tilename = factor(tilename, levels = c( "EINs Below the Median",
                                                  "EINs Above the Median"))) %>%
  ggplot(aes(x=fiscal_year,
             y = total_compensation,
            color = EIN,
            group = EIN)) +
  geom_line() +
  geom_point() +
  labs(title = "Compensation to Highest Paid Employees",
       subtitle = "Total Base Compensation to Highest Paid Employees By EIN",
       y = "Total Compensation",
       x = "Fiscal Year")+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          axis.text.x = element_text(size =8, vjust = .6, angle = 60)) +
  facet_wrap(~tilename, scales = "free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(labels = comma)


ggplotly(plt,height = 500, width =850) %>%
  layout(margin = m) %>%partial_bundle()
# plot compensation versus beginning year balance by fiscal year
comp <- comp_clean %>% 
  mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
  left_join(endowment_data_wide)  %>%
  group_by(EIN, fiscal_year, BeginningYearBalanceAmt, organization_name) %>%
  summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) 
  
  
plt <- comp %>%
  ggplot(aes(x=BeginningYearBalanceAmt,
             y = total_compensation,
             label = organization_name,
            color = EIN)) +
  geom_point() +
  facet_wrap(~fiscal_year, nrow = 2)+
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"))+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "magma",
                                 end = .9) +
  labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
      x = "Beginning of Year Balance",
      y = "Total Compensation")

ggplotly(plt, height = 500, width = 850) %>%
  layout(margin = m) %>% partial_bundle()
# logged scales
plt <- comp %>%
  ggplot(aes(x=BeginningYearBalanceAmt,
             y = total_compensation,
            color = EIN)) +
  geom_point() +
  facet_wrap(~fiscal_year, nrow = 2)+
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"))+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "magma",
                                 end = .9) +
  scale_x_log10() +
  scale_y_log10() +
  labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
       subtitle  = "Both Axes on Log Scale"
      x = "Beginning of Year Balance",
      y = "Total Compensation")


ggplotly(plt, height = 500, width = 850) %>%
  layout(margin = m) %>%
  partial_bundle()
## Revenue 


emp <- employees %>%
  filter(EIN %in% sample(employees$EIN, 5)) %>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change_comp = (CYSalariesCompEmpBnftPaidAmt - lag(CYSalariesCompEmpBnftPaidAmt, n =1))/  lag(CYSalariesCompEmpBnftPaidAmt, n =1),
         pct_change_revenue = (CYTotalRevenueAmt - lag(CYTotalRevenueAmt, n =1))/  lag(CYTotalRevenueAmt, n =1)) 

adj <- median(emp$pct_change_comp,na.rm=TRUE)/ median(emp$pct_change_revenue,na.rm=TRUE)

emp %>%
  ggplot(aes(x= fiscal_year , group=EIN)) +
  geom_line(aes(y= pct_change_revenue *adj)) +
  geom_line(aes(y= pct_change_comp),color='darkred') +
  facet_wrap(~EIN, scales="free_y") +
  scale_y_continuous(sec.axis = ~./adj) +
  theme_c()

Ranking of Beginning of Year Balance Compared to Ranking of C-Suite Compensation

plot_ranks("BeginningYearBalanceAmt",
           "total_compensation", data = comp )


Top Employees Compensation Compared to Other Employee Compensation

  • For total employee compensation - CYSalariesCompEmpBnftPaidAmt: Salaries, other compensation, employee benefits (Part IX, column (A), lines 5–10).
  • For top employee compensation - Schedule J, looking at total compensation
top <- comp_clean %>% 
  group_by(EIN,fiscal_year,organization_name) %>% 
  mutate(not_deferred = TotalCompensationFilingOrgAmt -DeferredCompensationFlngOrgAmt) %>%
  summarize(num_top_employees = n(),
            compensation_top_total = sum(TotalCompensationFilingOrgAmt),
            compensation_top_base = sum(BaseCompensationFilingOrgAmt),
            compensation_top_not_def = sum(not_deferred)) %>%
  ungroup()

emp_comp <- employees %>% 
  left_join(companies_to_ein) %>%
  left_join(top) 
plt <- emp_comp %>%
  ggplot(aes(x=fiscal_year,
             y = compensation_top_total/CYSalariesCompEmpBnftPaidAmt,
             color = organization_name,
             group= EIN)) +
  geom_point() +
  geom_line()+
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
  theme_bw() +
  labs(y="Fraction of Total Compensation Paid",
       title = "Fraction of Total Compensation Paid to C-Suite Employees",
       x="Fiscal Year") +
  scale_y_continuous(n.breaks = 6)+
    theme(plot.title = element_text(size = 14,
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5,
                                         face="italic",
                                         size = 12),
            axis.title = element_text(size = 13,
                                      face = "bold"))


ggplotly(plt, margin = m, height = 500) %>%
  partial_bundle()
 # plt <- emp_comp %>%
 #   filter(fiscal_year !=2014 & fiscal_year !=2021) %>% 
 #  ggplot(aes(y=CYSalariesCompEmpBnftPaidAmt/TotalEmployeeCnt, 
 #             x = compensation_top_base/num_top_employees,
 #             label=fiscal_year,
 #             color =organization_name,
 #             group = EIN)) +
 #  geom_point() +
 #   theme_bw() + 
 #  viridis::scale_color_viridis(discrete=TRUE,
 #                                 option = "rocket",
 #                                 end = .9) +
 #    theme(plot.title = element_text(size = 14, 
 #                                      hjust = .5, face="bold",),
 #            plot.subtitle = element_text(hjust = .5, 
 #                                         face="italic",
 #                                         size = 14),
 #            axis.title = element_text(size = 13, 
 #                                      face = "bold")) +
 #   scale_y_continuous(labels =comma) +
 #   labs(y = "Average Employee Compensation",
 #        x = "Average C-Suite Compensation",
 #        title = "Average C-Suite Compensation vs. Overall Average Employee Compensation") +
 #   facet_wrap(~fiscal_year)
 # 
 # ggplotly(plt, margin = m,height = 500)
 # 
 # 
 # 
 
 
 
 plt <- emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees) %>%
   filter(fiscal_year !=2014 & fiscal_year !=2021) %>% 
   # something strange with Aspen Santa Fe 
   filter(avg_not_top > 0) %>% 
  ggplot(aes(y=avg_not_top, 
             x =avg_top,
             label=fiscal_year,
             color =organization_name,
             group = EIN)) +
  geom_point() +
   theme_c() + 
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme(plot.title = element_text(size = 12, 
                                      hjust = .5, face="bold",
                                      margin = margin(5,5,5,5)),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          axis.text.x = element_text(angle = 20, vjust = .6)) +
   scale_y_continuous(labels =comma) +
   labs(y = "Average Employee Compensation",
        x = "Average C-Suite Compensation",
        title = "Average C-Suite Pay versus Average Employee Compensation") +
   facet_wrap(~fiscal_year) +
   scale_x_continuous(labels = comma)
 

marg <- list(
    l = 50,
    r = 50,
    b = 250,
    t = 250,
    pad = 0.5
)
 
 ggplotly(plt, margin = marg, height = 500) %>%
   partial_bundle()
plt <-  emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees) %>%
   group_by(fiscal_year) %>%
   mutate(rank_avg_not_top = rank(-avg_not_top,  na.last ="keep"),
          rank_avg_top = rank(-avg_top, na.last="keep")) %>%
   filter(fiscal_year !=2014 & fiscal_year !=2021) %>%
   ggplot(aes(x =rank_avg_top, y = rank_avg_not_top, color = organization_name)) +
   geom_point() +
   theme_c()+
   labs(y = "Ranking of Average Employee Compensation",
        x = "Ranking of Average C-Suite Compensation",
        title = "Average C-Suite Pay versus Average Employee Compensation") +
   facet_wrap(~fiscal_year)  +
  geom_function(fun=function(x)x,color="darkred", alpha = .8) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) 
 
 ggplotly(plt, margin = marg, height = 500) %>%
   partial_bundle()
 ranks <- emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees,
          organization_name = ifelse(is.na(organization_name), EIN, organization_name)) %>%
   group_by(fiscal_year) %>%
   mutate(rank_avg_not_top = rank(-avg_not_top,  na.last ="keep"),
          rank_avg_top = rank(-avg_top, na.last="keep")) %>%
   filter(fiscal_year !=2014 & fiscal_year !=2021) %>%
   mutate(rank_diff = rank_avg_top - rank_avg_not_top ) %>%
   group_by(organization_name) %>%
   mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
          sum_neg = sum(rank_diff < 0,  na.rm=TRUE),
          sum_zero = sum(rank_diff ==0, na.rm=TRUE)) %>%
   select(fiscal_year, rank_avg_not_top, rank_avg_top, contains("sum"), organization_name) %>%
   ungroup() %>%
   mutate(category = case_when(sum_pos != 0 & sum_neg != 0 ~ "Had Some Change",
                               sum_zero ==  sum_pos + sum_neg + sum_zero ~ "Always Ranked the Same",
                            sum_pos == sum_pos + sum_neg + sum_zero ~  "Always Ranked Better in Average Employee Compensation" ,
                            sum_neg == sum_pos + sum_neg + sum_zero ~ "Always Ranked Better in C-Suite Compensation"),
          prop_positive = sum_pos/(sum_pos+sum_zero+sum_neg)) 
 
 
plt <- ranks %>%
   ggplot(aes(x =rank_avg_top, y = rank_avg_not_top, 
              label = organization_name,
              color = prop_positive)) +
   geom_point() +
   theme_c()+
   labs(y = "Ranking of Average Employee Compensation",
        x = "Ranking of Average C-Suite Compensation",
        title = "Rankings of Average C-Suite Pay versus Average Employee Compensation",
        color = "Proportion Where C-Suite Compensation\nWas Ranked Better Than Average Employee Compensation") +
   facet_wrap(~fiscal_year)  +
  geom_function(fun=function(x)x,color="darkred", alpha = .8) +
 # scale_fill_distiller(palette ="PuOr", low="#EFC950", mid="#E2E2E2") +
  scale_color_gradient2(high="#5935CF", low="#D18E01", mid="#E2E2E2", limits=c(0,1), midpoint=.5)
 
 ggplotly(plt, margin = marg, height = 500) %>%
   partial_bundle()
 emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees) %>%
  filter(!is.na(avg_not_top) & !is.na(avg_top)) %>%
  group_by(EIN) %>%
  mutate(min_year = min(fiscal_year)) %>%
  filter(fiscal_year == min_year | fiscal_year == 2020) %>%
    arrange(fiscal_year) %>%
  mutate(pct_change_comp_top = ifelse(
      lag(avg_top, n =1) != 0,
      (avg_top - lag(avg_top, n =1))/
             lag(avg_top, n =1),
      NA),
          pct_change_comp_not_top  = ifelse(
      lag(avg_not_top, n =1) != 0,
      (avg_not_top - lag(avg_not_top, n =1))/
             lag(avg_not_top, n =1),
      NA)) %>%
  filter(fiscal_year == 2020) %>%
   mutate(diff =pct_change_comp_top - pct_change_comp_not_top) %>%
  left_join(companies_to_ein) %>%
  filter(!is.na(diff)) %>%
  ggplot(aes(x =fct_reorder(organization_name, diff), y = diff)) +
  geom_bar(stat="identity") +
  coord_flip() +
  theme_c() +
  labs(title = "Percent Change in Average Executive Compensation Minus Percent Change in Average Employee Compensation",
       y = "",
       x = "Difference in Percent Changes")



 emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees) %>%
  filter(!is.na(avg_not_top) & !is.na(avg_top)) %>%
  group_by(EIN) %>%
  # mutate(min_year = min(fiscal_year)) %>%
  # filter(fiscal_year == min_year | fiscal_year == 2020) %>%
  #   arrange(fiscal_year) %>%
  mutate(pct_change_comp_top = ifelse(
      lag(avg_top, n =1) != 0,
      (avg_top - lag(avg_top, n =1))/
             lag(avg_top, n =1),
      NA),
          pct_change_comp_not_top  = ifelse(
      lag(avg_not_top, n =1) != 0,
      (avg_not_top - lag(avg_not_top, n =1))/
             lag(avg_not_top, n =1),
      NA)) %>%
   ungroup() %>%
   filter(organization_name != "Aspen Santa Fe Ballet") %>%
   select(pct_change_comp_not_top, pct_change_comp_top, 
          organization_name, EIN, organization_name, fiscal_year) %>%
   #pivot_longer(c(pct_change_comp_not_top, pct_change_comp_top)) %>%
   ggplot(aes(x= pct_change_comp_not_top, y = pct_change_comp_top))+
   geom_point() +
   facet_wrap(~name)

Comparison to Financial Data: S&P 500

sp <- read_csv(here('data','SP500.csv')) %>%
  rename_with(tolower) %>%
  mutate(sp500 = as.numeric(sp500)) %>%
  filter(!is.na(sp500)) %>%
  mutate(month = month(date),
         year = year(date)) %>%
  group_by(month,year) %>%
  arrange(month) %>%
 slice_min(n=1,order_by = date) %>%
  ungroup()


sp <- read_csv(here('data','SP500.csv')) %>%
  rename_with(tolower) %>%
  mutate(sp500 = as.numeric(sp500)) %>%
  filter(!is.na(sp500)) %>%
  mutate(month = month(date),
         year = year(date)) %>%
  group_by(month,year) %>%
  summarize(sp500 = mean(sp500, na.rm=TRUE))

endowment_sp <- endowment_data %>%
  mutate(month = month(TaxPeriodEndDt),
         year= year(TaxPeriodEndDt)) %>%
  # since we don't have TaxPeriodEndDt for previous years,
  # assume month of fiscal year stayed the same
  group_by(EIN) %>%
  fill(month, .direction="up") %>%
  ungroup() %>%
  mutate(year = ifelse(is.na(year), fiscal_year, year)) %>%
  left_join(sp) 

Percentage Change

Notes on Interpretation

We compute the percentage change from one year to the next as

\[100 \times \dfrac{\text{Value in Current Year} - \text{Value in Previous Year}}{ \text{Value in Previous Year}}.\]

For a percent change of value \(P\), we can interpret this as saying the value in the current year is

\[\text{Value Current Year} = (P+1) \times \text{Value in the Previous Year}.\]

For example, a percentage change of 0 means the value in the current year is identical to the previous year, since we have \(\text{Value Current Year} = (1) \times \text{Value in the Previous Year}\).

A percentage change of 1 means the value in the current year is double that of the previous, since we have \(\text{Value Current Year} = (1+1) \times \text{Value in the Previous Year}\).

This interpretation still holds with the enormous percentage changes we have here, for example, Joffrey Ballet’s endowment (beginning of year balance) saw approximately a \(131\%\) increase in 2020 from its value in 2015, and we can interpret this as meaning the value in 2020 was 132 times the value in 2015.

Percentage Change in Beginning of Year Balance Over Time

#############################################################
# plot for percent change in beginning of year balance 
#############################################################

endowment_sp_pct <- endowment_sp %>%
  pivot_wider(names_from = variable_name, values_from = value) %>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change = ifelse(
    lag(BeginningYearBalanceAmt, n =1) != 0,
    (BeginningYearBalanceAmt - lag(BeginningYearBalanceAmt, n =1))/
           lag(BeginningYearBalanceAmt, n =1),
    NA),
         sp500_change = (sp500 - lag(sp500, n =1))/
           lag(sp500, n =1) ) %>%
  ungroup()


top_50 <- endowment_sp_pct%>%
  filter(!is.na(BeginningYearBalanceAmt)) %>%
  group_by(EIN, organization_name) %>%
  summarize(m = max(BeginningYearBalanceAmt, na.rm=TRUE), .groups="drop") %>%
  mutate(r = rank(-m)) %>% 
  arrange(r) %>%
  head(50)

endowment_sp_pct %>%
  inner_join(top_50) %>%
  filter(year >=2013) %>%
  mutate(date = make_date(month=month,year=year)) %>%
  select(organization_name, EIN, sp500_change,pct_change, date,r) %>%
  pivot_longer(c(sp500_change, pct_change)) %>%
  mutate(name =case_when( 
    name == "pct_change" ~ "Percent Change in\nBeginning of Year Balance",
    name == "sp500_change" ~ "Percent Change in\nS&P 500")) %>%
  ggplot(aes(x =date, y = value, color = name)) +
  geom_line() +
  facet_wrap(~fct_reorder(organization_name,r), scales="free_y", ncol =6 ) +
  theme_c(legend.position ="top",
          plot.title = element_text(size = 18, face="bold", margin = margin(5,0,12,0))) +
  scale_color_manual(values = c("#3D9DD8", "#D8AD3D")) + 
  labs(y = "Percent Change",
       color = "",
       title = "Percent Change in Beginning of Year Balance\nCompared to Percent Change in the S&P 500",
       subtitle = "Top 50 Companies, by Size of Maximum Beginning Year Balance") +
  guides(color = guide_legend(override.aes = list(linewidth = 3)))

#############################################################
# plot for percent change in investment earnings/losses
#############################################################

endowment_sp_pct <- endowment_sp %>%
  pivot_wider(names_from = variable_name, values_from = value) %>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change = ifelse(
    lag(InvestmentEarningsOrLossesAmt, n =1) != 0,
    (InvestmentEarningsOrLossesAmt - lag(InvestmentEarningsOrLossesAmt, n =1))/
           lag(InvestmentEarningsOrLossesAmt, n =1),
    NA),
         sp500_change = (sp500 - lag(sp500, n =1))/
           lag(sp500, n =1) ) %>%
  ungroup()

# using same top 50 as before
endowment_sp_pct %>%
  inner_join(top_50) %>%
  filter(year >=2013) %>%
  mutate(date = make_date(month=month,year=year)) %>%
  select(organization_name, EIN, sp500_change,pct_change, date,r) %>%
  pivot_longer(c(sp500_change, pct_change)) %>%
  mutate(name =case_when( 
    name == "pct_change" ~ "Percent Change in\nInvestment Earnings or Losses",
    name == "sp500_change" ~ "Percent Change in\nS&P 500")) %>%
  ggplot(aes(x =date, y = value, color = name)) +
  geom_hline(yintercept = 0, alpha = .4, color="darkred") +
  geom_line() +
  facet_wrap(~fct_reorder(organization_name,r), scales="free_y") +
  theme_c(legend.position ="top",
          plot.title = element_text(size = 18, face="bold", margin = margin(5,0,12,0))) +
  scale_color_manual(values = c("#3D9DD8", "#D8AD3D")) + 
  labs(y = "Percent Change",
       color = "",
       title = "Percent Change in Investment Earnings or Losses\nCompared to Percent Change in the S&P 500",
       subtitle = "Top 50 Companies, by Size of Maximum Beginning Year Balance") +
  guides(color = guide_legend(override.aes = list(linewidth = 3))) 

Percent Change in Beginning of Year Balance Compared to S&P 500 From Earliest Date to 2020

Below, we compute the percentage change in endowment beginning of year balance for the earliest year the company had a value on record to the value in 2020. Then we compare this value to the percent change in S&P 500 from this same time interval.

 # endowment_sp %>%
 #  pivot_wider(names_from = variable_name, values_from = value) %>% 
 #   group_by(fiscal_year) %>%
 #   summarize(count_na = length(na.omit(BeginningYearBalanceAmt)))
 
 
pct_from_early <- endowment_sp %>%
  # only have S&P 500 back to 2013
  filter(fiscal_year >= 2013) %>%
  pivot_wider(names_from = variable_name, values_from = value) %>%
  # want to find minimum year where they have beginning of year balance data
  filter(!is.na(BeginningYearBalanceAmt) & BeginningYearBalanceAmt != 0) %>%
  group_by(EIN) %>%
  mutate(min_year = min(fiscal_year)) %>%
   filter(fiscal_year == min_year | fiscal_year == 2020)%>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change = ifelse(
    lag(BeginningYearBalanceAmt, n =1) != 0,
    (BeginningYearBalanceAmt - lag(BeginningYearBalanceAmt, n =1))/
           lag(BeginningYearBalanceAmt, n =1),
    NA),
         sp500_change = (sp500 - lag(sp500, n =1))/
           lag(sp500, n =1) ) %>%
  ungroup() %>%
  select(EIN, fiscal_year, organization_name, pct_change, sp500_change, BeginningYearBalanceAmt, sp500)


pct_from_early %>%
  mutate(difference = pct_change - sp500_change ) %>%
  filter(fiscal_year == 2020) %>%
 # filter(fiscal_year ==2020 & !is.na(difference)) %>%
  mutate(organization_name = fct_reorder(organization_name, difference)) %>%
  ggplot(aes(y = fct_reorder(organization_name, difference),
             x = difference)) +
  geom_bar(stat="identity") +
  labs(title = "Percent Change in Beginning of Year Balance\nMinus Percent Change in S&P 500",
       subtitle = paste0("Positive Values Indicate Endowment Did Better Relative to S&P 500\n",
                         "Negative Values Indicate Endowment Did Worse Relative to S&P 500"),
       y = "") +
  theme_c() +
  theme(plot.title = element_text(margin = margin(5,0,12,0),
                                    face="bold",
                                    size = 12,
                                  hjust = .5),
          plot.subtitle = element_text(size = 11, 
                                       face="italic", 
                                       hjust = .5))

pct_from_early <- endowment_sp %>%
  # only have S&P 500 back to 2013
  filter(fiscal_year >= 2013) %>%
  pivot_wider(names_from = variable_name, values_from = value) %>%
  # want to find minimum year where they have beginning of year balance data
  filter(!is.na(BeginningYearBalanceAmt) & BeginningYearBalanceAmt != 0) %>%
  group_by(EIN) %>%
  mutate(min_year = min(fiscal_year)) %>%
   filter(fiscal_year == min_year | fiscal_year == 2020)%>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change = ifelse(
    lag(BeginningYearBalanceAmt, n =1) != 0,
    (BeginningYearBalanceAmt - lag(BeginningYearBalanceAmt, n =1))/
           lag(BeginningYearBalanceAmt, n =1),
    NA),
         sp500_change = (sp500 - lag(sp500, n =1))/
           lag(sp500, n =1) ) %>%
  ungroup() %>%
  select(EIN, fiscal_year, organization_name, pct_change, sp500_change, BeginningYearBalanceAmt, sp500)


employees %>%
  mutate(comp_not_dir = CYSalariesCompEmpBnftPaidAmt - OffDirCompAmt) %>%
  filter(!is.na(CYSalariesCompEmpBnftPaidAmt) & !is.na(OffDirCompAmt) & OffDirCompAmt != 0) %>%
  group_by(EIN) %>%
  mutate(min_year = min(fiscal_year)) %>%
   filter(fiscal_year == min_year | fiscal_year == 2020)%>%
  group_by(EIN) %>%
  arrange(fiscal_year) %>%
  mutate(pct_change_comp_dir = ifelse(
    lag(OffDirCompAmt, n =1) != 0,
    (OffDirCompAmt - lag(OffDirCompAmt, n =1))/
           lag(OffDirCompAmt, n =1),
    NA),
        pct_change_comp_not_dir = ifelse(
    lag(comp_not_dir, n =1) != 0,
    (comp_not_dir - lag(comp_not_dir, n =1))/
           lag(comp_not_dir, n =1),
    NA),
    employee_ct_diff = EmployeeCnt - lag(EmployeeCnt, n =1)) %>%
  filter(fiscal_year == 2020) %>%
  ungroup() %>% 
  mutate(diff =pct_change_comp_dir - pct_change_comp_not_dir) %>%
  left_join(companies_to_ein) %>%
  filter(!is.na(diff)) %>%
  ggplot(aes(x =fct_reorder(organization_name, diff), y = diff, fill = employee_ct_diff)) +
  geom_bar(stat="identity") +
  coord_flip()


employees %>%
  mutate(comp_not_dir = CYSalariesCompEmpBnftPaidAmt - OffDirCompAmt) %>%
  select(fiscal_year,comp_not_dir, OffDirCompAmt, EIN ) %>%
  pivot_longer(c( comp_not_dir, OffDirCompAmt)) %>%
  ggplot(aes(x = fiscal_year, y = value, group=EIN)) +
  geom_line() +
  facet_wrap(~name)
## Percent Change in Investments Compared to S&P 500

 
pct_from_early <- endowment_sp %>%
  # only have S&P 500 back to 2013
  filter(fiscal_year >= 2013 & fiscal_year <= 2020) %>%
  pivot_wider(names_from = variable_name, values_from = value) %>%
  # want to find minimum year where they have beginning of year balance data
  filter(!is.na(BeginningYearBalanceAmt) & BeginningYearBalanceAmt != 0 & !is.na(InvestmentEarningsOrLossesAmt)) %>%
  group_by(EIN, organization_name) %>%
  mutate(net_earnings = sum(InvestmentEarningsOrLossesAmt, na.rm= TRUE),
            initial_endowment = BeginningYearBalanceAmt[which.min(fiscal_year)],
            min_fiscal_year = min(fiscal_year) )  %>%
  mutate(investments_endowment_ratio = net_earnings / initial_endowment)
  mutate(pct_change = ifelse(
    lag(InvestmentEarningsOrLossesAmt, n =1) != 0,
    (InvestmentEarningsOrLossesAmt - lag(InvestmentEarningsOrLossesAmt, n =1))/
           lag(InvestmentEarningsOrLossesAmt, n =1),
    NA),
         sp500_change = (sp500 - lag(sp500, n =1))/
           lag(sp500, n =1) ) %>%
  ungroup() %>%
  select(EIN, fiscal_year, organization_name, pct_change, sp500_change, InvestmentEarningsOrLossesAmt, sp500)


pct_from_early %>%
  mutate(difference = pct_change - sp500_change ) %>%
  filter(fiscal_year == 2020) %>%
 # filter(fiscal_year ==2020 & !is.na(difference)) %>%
  mutate(organization_name = fct_reorder(organization_name, difference)) %>%
  ggplot(aes(y = fct_reorder(organization_name, difference),
             x = difference)) +
  geom_bar(stat="identity") +
  labs(title = "Percent Change in Beginning of Year Balance Minus Percent Change in S&P 500",
       subtitle = paste0("Positive Values Indicate Endowment Did Better Relative to S&P 500\n",
                         "Negative Values Indicate Endowment Did Worse Relative to S&P 500"),
       y = "") +
  theme_c(plot.title = element_text(margin = margin(5,0,12,0),
                                    face="bold",
                                    size = 16))
plt <- endowment_sp %>%
  filter(variable_name %in% c("BeginningYearBalanceAmt")) %>%
  group_by(EIN) %>%
  mutate(m = median(value, na.rm= TRUE)) %>%
  ungroup() %>%
#  group_by(EIN) %>%
  mutate(tile = ntile(m,2),
         tilename = ifelse(tile == 1,
                           "EINs Below the Median",
                           "EINs Above the Median"))%>%
  # only EINS where all observations are NA will be dropped
  filter(!is.na(tilename)) %>%
  mutate(date = as_date(paste0(month, "-", year), format = "%m-%Y")) %>%
  mutate(normalized = value/sp500,
         not = value) %>%
  select(date, normalized, not,variable_name, organization_name, tilename) %>%
  pivot_longer(c(normalized,not)) %>%
  ggplot(aes(x=date, y = value, color = organization_name)) +
  geom_point() +
  geom_line() +
  facet_wrap(~tilename+name, scales="free", ncol = 2)+
  labs(title = "Normalizing by S&P 500") +
   theme_bw() + 
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          strip.text = element_text(margin = margin(10,5,5,5,"pt")))





ggplotly(plt, margin = m, height = 500, width = 850) %>%
  partial_bundle()